import holoviews as hv
import geoviews as gv
import xarray as xr
import cartopy.crs as ccrs
import hvplot
import hvplot.xarray
import numpy as np
import pop_tools
import geoviews.feature as gf
import intake
import geocat.comp
import yaml
gv.extension('bokeh')
with open("case_config.yml", mode="r") as fptr:
    case_config = yaml.safe_load(fptr)

Spin up a Dask Cluster

from distributed import Client
from ncar_jobqueue import NCARCluster

cluster = NCARCluster(memory='20 GB')
cluster.scale(80)
client = Client(cluster)
/glade/u/home/mgrover/.local/lib/python3.9/site-packages/dask_jobqueue/core.py:19: FutureWarning: format_bytes is deprecated and will be removed in a future release. Please use dask.utils.format_bytes instead.
  from distributed.utils import format_bytes, parse_bytes, tmpfile
/glade/u/home/mgrover/.local/lib/python3.9/site-packages/dask_jobqueue/core.py:19: FutureWarning: parse_bytes is deprecated and will be removed in a future release. Please use dask.utils.parse_bytes instead.
  from distributed.utils import format_bytes, parse_bytes, tmpfile
/glade/u/home/mgrover/.local/lib/python3.9/site-packages/dask_jobqueue/htcondor.py:6: FutureWarning: parse_bytes is deprecated and will be removed in a future release. Please use dask.utils.parse_bytes instead.
  from distributed.utils import parse_bytes
client

Client

Client-62ad0abd-09e3-11ec-8c31-3cecef1b12ec

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status

Cluster Info

Read in the Data Using Intake-ESM

data_catalog = intake.open_esm_datastore(
    "../data/hires_catalog.json",
)
data_catalog

None catalog with 6 dataset(s) from 16028 asset(s):

unique
component 2
stream 5
case 2
member_id 1
variable 471
start_time 102
end_time 102
time_range 102
long_name 454
units 48
vertical_levels 2
frequency 3
path 16028
variables = case_config['variables']
dsets = data_catalog.search(variable=variables).to_dataset_dict(cdf_kwargs={'chunks':{'nlat':800, 'nlon':900, 'z_t':4}})
--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.stream.case'
100.00% [1/1 00:00<00:00]
ds = dsets['ocn.pop.h.g.e22.G1850ECO_JRA_HR.TL319_t13.004'][variables]
ds
<xarray.Dataset>
Dimensions:  (time: 408, z_t: 62, nlat: 2400, nlon: 3600)
Coordinates:
  * z_t      (z_t) float32 500.0 1.5e+03 2.5e+03 ... 5.625e+05 5.875e+05
    ULONG    (nlat, nlon) float64 dask.array<chunksize=(800, 900), meta=np.ndarray>
    ULAT     (nlat, nlon) float64 dask.array<chunksize=(800, 900), meta=np.ndarray>
    TLONG    (nlat, nlon) float64 dask.array<chunksize=(800, 900), meta=np.ndarray>
    TLAT     (nlat, nlon) float64 dask.array<chunksize=(800, 900), meta=np.ndarray>
  * time     (time) object 0002-02-01 00:00:00 ... 0025-01-01 00:00:00
Dimensions without coordinates: nlat, nlon
Data variables:
    TEMP     (time, z_t, nlat, nlon) float32 dask.array<chunksize=(12, 4, 800, 900), meta=np.ndarray>
Attributes:
    cell_methods:            cell_methods = time: mean ==> the variable value...
    source:                  CCSM POP2, the CCSM Ocean Component
    history:                 none
    intake_esm_varname:      ['TEMP']
    time_period_freq:        month_1
    revision:                $Id$
    title:                   g.e22.G1850ECO_JRA_HR.TL319_t13.004
    Conventions:             CF-1.0; http://www.cgd.ucar.edu/cms/eaton/netcdf...
    calendar:                All years have exactly  365 days.
    contents:                Diagnostic and Prognostic Variables
    model_doi_url:           https://doi.org/10.5065/D67H1H0V
    intake_esm_dataset_key:  ocn.pop.h.g.e22.G1850ECO_JRA_HR.TL319_t13.004

Plot the Output

def add_cyclic_point(ds):
    """
    Add a cyclic point to POP model output dataset
    Parameters
    ----------
    ds : xarray.Dataset
        POP output dataset
    Returns
    -------
    dso : xarray.Dataset
        modified POP model output dataset with cyclic point added
    """
    ni = ds.TLONG.shape[1]

    xL = int(ni / 2 - 1)
    xR = int(xL + ni)

    tlon = ds.TLONG.data
    tlat = ds.TLAT.data

    tlon = np.where(np.greater_equal(tlon, min(tlon[:, 0])), tlon - 360.0, tlon)
    lon = np.concatenate((tlon, tlon + 360.0), 1)
    lon = lon[:, xL:xR]

    if ni == 320:
        lon[367:-3, 0] = lon[367:-3, 0] + 360.0
    lon = lon - 360.0

    lon = np.hstack((lon, lon[:, 0:1] + 360.0))
    if ni == 320:
        lon[367:, -1] = lon[367:, -1] - 360.0

    # Trick cartopy into doing the right thing:
    # Cartopy gets confused when the cyclic coords are identical
    lon[:, 0] = lon[:, 0] - 1e-8

    # Periodicity
    lat = np.concatenate((tlat, tlat), 1)
    lat = lat[:, xL:xR]
    lat = np.hstack((lat, lat[:, 0:1]))

    TLAT = xr.DataArray(lat, dims=('nlat', 'nlon'))
    TLONG = xr.DataArray(lon, dims=('nlat', 'nlon'))

    dso = xr.Dataset({'TLAT': TLAT, 'TLONG': TLONG})

    # Copy vars
    varlist = [v for v in ds.data_vars if v not in ['TLAT', 'TLONG']]
    for v in varlist:
        v_dims = ds[v].dims
        if not ('nlat' in v_dims and 'nlon' in v_dims):
            print(v_dims)
            dso[v] = ds[v]
        else:
            # Determine and sort other dimensions
            other_dims = set(v_dims) - {'nlat', 'nlon'}
            other_dims = tuple([d for d in v_dims if d in other_dims])
            lon_dim = ds[v].dims.index('nlon')
            field = ds[v].data
            field = np.concatenate((field, field), lon_dim)
            field = field[..., :, xL:xR]
            field = np.concatenate((field, field[..., :, 0:1]), lon_dim)
            dso[v] = xr.DataArray(field, dims=other_dims + ('nlat', 'nlon'), attrs=ds[v].attrs)

    # Copy coords
    for v, da in ds.coords.items():
        if not ('nlat' in da.dims and 'nlon' in da.dims):
            #print(v, da)
            dso = dso.assign_coords(**{v: da})
    return dso
ds_sub = ds[variables]
climo = geocat.comp.climatology(ds_sub, 'month')
top_ten_levs = climo.isel(z_t=range(10))
%%time
cyclic_ds = add_cyclic_point(top_ten_levs)
CPU times: user 42.3 s, sys: 1.79 s, total: 44.1 s
Wall time: 2min 16s
cyclic_ds = cyclic_ds.set_coords(['TLAT', 'TLONG'])
client.restart()

Client

Client-c6d51787-09cb-11ec-a95b-3cecef1b12ec

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status

Cluster Info

pop_plot = cyclic_ds.TEMP.hvplot.quadmesh(x='TLONG', y='TLAT', cmap='magma', rasterize=True, crs=ccrs.PlateCarree(), projection=ccrs.Robinson(), project=True) * gf.coastline
pop_plot